capture log close
clear all
set maxvar 10000
*set matsize 11000
set more off
pause off
# delimit;

local crime "";
local data "";

/* 
Sample of SSI children 1990-2012, prep 1
*/

/* ********************************** */
/* **** PREP 831 FILES FOR MERGE **** */
/* ********************************** */
local state_list 
		state=="ar" | state=="az" | state=="ca" | state=="co" | state=="ct" | state=="fl" | 
		state=="il" | state=="in" | state=="ks" | state=="md" | state=="mi" | state=="mn" | 
		state=="ms" | state=="nc" | state=="nd" | state=="ne" | state=="nj" | state=="oh" | 
		state=="or" | state=="pa" | state=="tx" | state=="va" | state=="wa" | state=="wi" ;
local foreach_st 		"ar az ca co ct fl ks il in md mi mn ms nc nd ne nj oh or pa tx wa wi" ; /* va - exclude because different merge variables */
local foreach_st_no1 	"   az ca co ct fl ks il in md mi mn ms nc nd ne nj oh or pa tx wa wi" ; /* va - exclude because different merge variables */

use 	 "`crime'\allssr_crimekid.dta", clear;

/* ****** TO REDUCE FILE SIZE, LIMIT TO KIDS WITH YOB 1976-1998 ******* */
gen dobyy = year(dob) ;
tab dobyy , m ;
keep if dobyy >=1976 & dobyy <=1998 ;
gen dob_mm = month(dob) ;
gen dob_dd = day(dob) ;

/* Get states */
gen state_ssa = substr(pdscc,1,2) ; 
destring state_ssa, replace ;
merge m:1 state_ssa using "`credit'\state_codes.dta", keepusing(state_abbrev) ;
drop if _merge == 2 ;
drop _merge ;		
gen state=lower(state_abbrev);
drop state_abbrev state_ssa ;

/* Keep only states in the crime data */
keep if `state_list' ;

/* RECLINK MERGE ID */
gen idu=_n;

/* Format names in 831 */
rename fssr_lastn last_name;
rename fssr_firstn first_name;
replace fssr_middlen="" if fssr_middlen=="-";
gen middle_name = substr(fssr_middlen, 1, 1);

/* Drop empty records */
drop if last_name=="";
drop if first_name=="";
drop if dob==. ;

/* Replace special chars */
foreach sp_char in "." "," ";" "&" "#" "(" ")" "'" ":" "-" 
			   "{" "}" "*" "[" "]" "`" "'" " " {;
qui replace last_name   = subinstr(last_name,   "`sp_char'", "",.);
qui replace first_name  = subinstr(first_name,  "`sp_char'", "",.);
qui replace middle_name = subinstr(middle_name, "`sp_char'", "",.);
};

save "`crime'\831\crime_allssr_formerge_prep.dta", replace;


/* **************************************************************** */
/* **** NEW MERGE PROCESS [based on "reclink" ado source code] **** */
/* **************************************************************** */	


local required_var1 "st last_name first_name middle_name dob"; /* Part1: 1st best match */
local required_var2 "st last_name first_name dob"; 			 /* Part2: 2nd best match (no middle name) */
local required_var1va "st last_name first_name middle_name dob_mm dob_dd" ; /* Part1 for VA: VA only has month and day of birth */
local required_var2va "st last_name first_name dob_mm dob_dd" ; /* Part2 for VA: VA only has month and day of birth */


/* ***************************************************************** */
/* ************* MERGE ALL STATES EXCEPT FOR VA ******************** */
/* ***************************************************************** */

/* ******************************** */
/* STEP 1: "USING File" Preparation */	
/* ******************************** */

/* ---------------------------------------------------------------------- */
/* PART 1 use: last_name first_name middle_name dob */
use "`crime'\831\crime_allssr_formerge_prep.dta", clear;
drop if state == "va" ; 										/* **** DROP VA FROM DATA **** */
sort `required_var1';
egen unique_idu = group(`required_var1'), missing;
rename middle_name Umiddle_name;
sort unique_idu;
tempfile using_file_for_restore_pt1;
save "`using_file_for_restore_pt1'", replace;

/* Collapse the using dataset down to unique combination of `required_varX' */
rename Umiddle_name middle_name;
keep `required_var1' unique_idu;
bysort unique_idu: keep if _n == 1;
sort `required_var1';
tempfile using_file_unique_merge_pt1;
save `using_file_unique_merge_pt1', replace;

/* ---------------------------------------------------------------------- */
/* PART 2 use: last_name first_name dob */
use "`crime'\831\crime_allssr_formerge_prep.dta", clear;
drop if state == "va" ; 										/* **** DROP VA FROM DATA **** */
sort `required_var2';
egen unique_idu = group(`required_var2'), missing;
rename middle_name Umiddle_name;
sort unique_idu;
tempfile using_file_for_restore_pt2;
save "`using_file_for_restore_pt2'", replace;

/* Collapse the using dataset down to unique combination of `required_varX' */
rename Umiddle_name middle_name;
keep `required_var2' unique_idu;
bysort unique_idu: keep if _n == 1;
sort `required_var2';
tempfile using_file_unique_merge_pt2;
save "`using_file_unique_merge_pt2'", replace;


/* **************************************** */
/* STEP 2: Execute - Identify Exact Matches */	
/* **************************************** */

/* #### PART 1 START #### */
use "`crime'\merge\cjars_identifier_ar.dta", clear;
foreach st in 	`foreach_st_no1' { ;
	append using "`crime'\merge\cjars_identifier_`st'.dta" ;
} ;
rename lastn last_name ;
rename firstn first_name ;
rename middlen middle_name ;
gen idm=_n;
drop if last_name=="";
drop if first_name=="";

foreach sp_char in "." "," ";" "&" "#" "(" ")" "'" ":" "-" 
		   "{" "}" "*" "[" "]" "`" "'" " " {;
	qui replace last_name   = subinstr(last_name,   "`sp_char'", "",.);
	qui replace first_name  = subinstr(first_name,  "`sp_char'", "",.);
	qui replace middle_name = subinstr(middle_name, "`sp_char'", "",.);
};
count;

replace middle_name = substr(middle_name,1,1);

/* ******************** */
keep if middle_name != "";
/* ******************** */

/* Following "reclink" ado source code and use the old merge syntax */
sort `required_var1';
merge `required_var1' using "`using_file_unique_merge_pt1'", nokeep;
keep if _merge == 3;
drop _merge;

local perfectn1 = c(N);
noi di as result _n "`perfectn1' perfect matches found in Part 1" _n;

/* Merge back all "using" observations with 
restore file `using_file_for_restore' and identifier `unique_idu' */
sort unique_idu;
merge unique_idu using "`using_file_for_restore_pt1'", nokeep;
keep if _merge == 3;
drop _merge unique_idu;
order Umiddle_name, a(middle_name);

gen byte part=1;
tempfile allssr_merge_pt1 ;
save `allssr_merge_pt1' ;
count;

/* #### PART 1 END #### */
/* ---------------------------------------------------------------------- */
/* #### PART 2 START #### */
use "`crime'\merge\cjars_identifier_ar.dta", clear;
foreach st in 	`foreach_st_no1' { ;
	append using "`crime'\merge\cjars_identifier_`st'.dta" ;
} ;
rename lastn last_name ;
rename firstn first_name ;
rename middlen middle_name ;
gen idm=_n;
drop if last_name=="";
drop if first_name=="";

foreach sp_char in "." "," ";" "&" "#" "(" ")" "'" ":" "-" 
		   "{" "}" "*" "[" "]" "`" "'" " " {;
	qui replace last_name   = subinstr(last_name,   "`sp_char'", "",.);
	qui replace first_name  = subinstr(first_name,  "`sp_char'", "",.);
	qui replace middle_name = subinstr(middle_name, "`sp_char'", "",.);
};
count;

replace middle_name = substr(middle_name,1,1);

/* ******************** */
keep if middle_name == "";
/* ******************** */

/* Following "reclink" ado source code and use the old merge syntax */
sort `required_var2';
merge `required_var2' using "`using_file_unique_merge_pt2'", nokeep;
keep if _merge == 3;
drop _merge;

local perfectn2 = c(N);
noi di as result _n "`perfectn2' perfect matches found in Part 2" _n;

/* Merge back all "using" observations with 
restore file `using_file_for_restore' and identifier `unique_idu' */
sort unique_idu;
merge unique_idu using "`using_file_for_restore_pt2'", nokeep;
keep if _merge == 3;
drop _merge unique_idu;
order Umiddle_name, a(middle_name);

gen byte part=2;
tempfile allssr_merge_pt2 ;
save `allssr_merge_pt2' ;









/* ***************************************************************** */
/* ************************ MERGE VA SEPARATELY ******************** */
/* ***************************************************************** */

/* ******************************** */
/* STEP 1: "USING File" Preparation */	
/* ******************************** */

/* ---------------------------------------------------------------------- */
/* PART 1 use: last_name first_name middle_name dob */
use "`crime'\831\crime_allssr_formerge_prep.dta", clear;
keep if state == "va" ; 										/* **** keep only VA! **** */
sort `required_var1va';
egen unique_idu = group(`required_var1va'), missing;
rename middle_name Umiddle_name;
sort unique_idu;
tempfile using_file_for_restore_pt1va;
save "`using_file_for_restore_pt1va'", replace;

/* Collapse the using dataset down to unique combination of `required_varX' */
rename Umiddle_name middle_name;
keep `required_var1va' unique_idu;
bysort unique_idu: keep if _n == 1;
sort `required_var1va';
tempfile using_file_unique_merge_pt1va;
save `using_file_unique_merge_pt1va', replace;

/* ---------------------------------------------------------------------- */
/* PART 2 use: last_name first_name dob */
use "`crime'\831\crime_allssr_formerge_prep.dta", clear;
keep if state == "va" ; 										/* **** keep only VA! **** */
sort `required_var2va';
egen unique_idu = group(`required_var2va'), missing;
rename middle_name Umiddle_name;
sort unique_idu;
tempfile using_file_for_restore_pt2va;
save "`using_file_for_restore_pt2va'", replace;

/* Collapse the using dataset down to unique combination of `required_varX' */
rename Umiddle_name middle_name;
keep `required_var2va' unique_idu;
bysort unique_idu: keep if _n == 1;
sort `required_var2va';
tempfile using_file_unique_merge_pt2va;
save "`using_file_unique_merge_pt2va'", replace;


/* **************************************** */
/* STEP 2: Execute - Identify Exact Matches */	
/* **************************************** */

/* #### PART 1 START #### */
use "`crime'\merge\cjars_identifier_va.dta", clear;
rename lastn last_name ;
rename firstn first_name ;
rename middlen middle_name ;
gen idm=_n;
drop if last_name=="";
drop if first_name=="";

foreach sp_char in "." "," ";" "&" "#" "(" ")" "'" ":" "-" 
		   "{" "}" "*" "[" "]" "`" "'" " " {;
	qui replace last_name   = subinstr(last_name,   "`sp_char'", "",.);
	qui replace first_name  = subinstr(first_name,  "`sp_char'", "",.);
	qui replace middle_name = subinstr(middle_name, "`sp_char'", "",.);
};
count;

replace middle_name = substr(middle_name,1,1);

/* ******************** */
keep if middle_name != "";
/* ******************** */

/* Following "reclink" ado source code and use the old merge syntax */
sort `required_var1va';
merge `required_var1va' using "`using_file_unique_merge_pt1va'", nokeep;
keep if _merge == 3;
drop _merge;

local perfectn1 = c(N);
noi di as result _n "`perfectn1' perfect matches found in Part 1" _n;

/* Merge back all "using" observations with 
restore file `using_file_for_restore' and identifier `unique_idu' */
sort unique_idu;
merge unique_idu using "`using_file_for_restore_pt1va'", nokeep;
keep if _merge == 3;
drop _merge unique_idu;
order Umiddle_name, a(middle_name);

gen byte part=1;
tempfile allssr_merge_pt1va ;
save `allssr_merge_pt1va' ;
count;

/* #### PART 1 END #### */
/* ---------------------------------------------------------------------- */
/* #### PART 2 START #### */
use "`crime'\merge\cjars_identifier_va.dta", clear;
rename lastn last_name ;
rename firstn first_name ;
rename middlen middle_name ;
gen idm=_n;
drop if last_name=="";
drop if first_name=="";

foreach sp_char in "." "," ";" "&" "#" "(" ")" "'" ":" "-" 
		   "{" "}" "*" "[" "]" "`" "'" " " {;
	qui replace last_name   = subinstr(last_name,   "`sp_char'", "",.);
	qui replace first_name  = subinstr(first_name,  "`sp_char'", "",.);
	qui replace middle_name = subinstr(middle_name, "`sp_char'", "",.);
};
count;

replace middle_name = substr(middle_name,1,1);

/* ******************** */
keep if middle_name == "";
/* ******************** */

/* Following "reclink" ado source code and use the old merge syntax */
sort `required_var2va';
merge `required_var2va' using "`using_file_unique_merge_pt2va'", nokeep;
keep if _merge == 3;
drop _merge;

local perfectn2 = c(N);
noi di as result _n "`perfectn2' perfect matches found in Part 2" _n;

/* Merge back all "using" observations with 
restore file `using_file_for_restore' and identifier `unique_idu' */
sort unique_idu;
merge unique_idu using "`using_file_for_restore_pt2va'", nokeep;
keep if _merge == 3;
drop _merge unique_idu;
order Umiddle_name, a(middle_name);

gen byte part=2;
tempfile allssr_merge_pt2va ;
save `allssr_merge_pt2va' ;






/* Append all parts */
append using `allssr_merge_pt1va';
append using `allssr_merge_pt1';
append using `allssr_merge_pt2';


/* Sort and drop duplicate merges */

if _N>0 { ;
	sort cjars_id hun part ;
	duplicates drop cjars_id hun, force ;

	/* Merge stats */
	noi tab part;

	/* Exporting final merged file */
	save "`crime'\prep1\crime_allssr_merged.dta", replace;
} ;

capture log close;
